To organise input data for segmented regression of age-standardised mortality rates. * Load packages * Load data * Ensure correctly labelled * Convert dates to identifiable quarters * Visualise trends
#install.packages("pacman")
pacman::p_load(
tidyverse,
segmented,
plotly
)
data <- read_csv("Data/QuarterlyASMR1990_2018.csv")
Parsed with column specification:
cols(
period = col_integer(),
sex = col_character(),
allageesprate = col_double(),
allagestderr = col_double(),
allagelcli = col_double(),
allageucli = col_double(),
under75esprate = col_double(),
under75stderr = col_double(),
under75lcli = col_double(),
under75ucli = col_double(),
totdeaths = col_integer()
)
data
glimpse(data)
Observations: 342
Variables: 11
$ period <int> 19901, 19901, 19901, 19902, 19902, 19902, 19903, 19903, 19903, 19904, 19904, 19904, 19911, 19911, 19...
$ sex <chr> "F", "M", "P", "F", "M", "P", "F", "M", "P", "F", "M", "P", "F", "M", "P", "F", "M", "P", "F", "M", ...
$ allageesprate <dbl> 1500.400, 2287.741, 1796.271, 1499.669, 2271.778, 1790.583, 1485.507, 2252.730, 1774.039, 1386.232, ...
$ allagestderr <dbl> 7.788046, 13.950236, 6.854256, 7.774313, 13.869048, 6.830757, 7.730787, 13.786253, 6.788831, 7.49520...
$ allagelcli <dbl> 1485.136, 2260.399, 1782.837, 1484.431, 2244.595, 1777.194, 1470.355, 2225.709, 1760.733, 1371.541, ...
$ allageucli <dbl> 1515.665, 2315.084, 1809.706, 1514.907, 2298.962, 1803.971, 1500.660, 2279.751, 1787.345, 1400.923, ...
$ under75esprate <dbl> 589.2652, 1010.2307, 777.9763, 590.4938, 1002.3490, 775.1230, 588.0220, 993.3030, 770.2041, 563.1330...
$ under75stderr <dbl> 4.994766, 7.222368, 4.242511, 4.998890, 7.183408, 4.230873, 4.985508, 7.136414, 4.212956, 4.878688, ...
$ under75lcli <dbl> 579.4755, 996.0749, 769.6609, 580.6960, 988.2696, 766.8305, 578.2504, 979.3157, 761.9467, 553.5707, ...
$ under75ucli <dbl> 599.0550, 1024.3866, 786.2916, 600.2917, 1016.4285, 783.4155, 597.7936, 1007.2904, 778.4615, 572.695...
$ totdeaths <int> 34163, 31360, 65523, 34216, 31247, 65463, 33997, 31069, 65066, 31910, 29617, 61527, 31664, 29208, 60...
data %>%
separate(period, into = c("year","quarter"), sep = 4) %>%
mutate(
year = as.double(year),
quarter = as.double(quarter)
) %>%
mutate(
time = year + (quarter / 4) - 0.125
) %>%
select(time, sex, totdeaths) -> tidied_totdeaths
tidied_totdeaths %>%
ggplot(
aes(
x = time,
y = totdeaths,
color = sex
)
) +
geom_line() +
coord_cartesian(
ylim = c(0, 70000)
) -> alldeathsplot
alldeathsplot %>%
ggplotly()
tidied_totdeaths %>%
filter(
sex != "P"
) %>%
ggplot(
aes(
x = time,
y = totdeaths,
color = sex
)
) +
geom_line() +
coord_cartesian(
ylim = c(0, 40000)
)
data %>%
separate(period, into = c("year","quarter"), sep = 4) %>%
mutate(
year = as.double(year),
quarter = as.double(quarter)
) %>%
mutate(
time = year + (quarter / 4) - 0.125
) %>%
select(time, sex, allageesprate, allagelcli, allageucli) -> tidied_allagerates
tidied_allagerates %>%
filter(
sex != "P"
) %>%
ggplot(
aes(
x = time,
group = sex
)
) +
geom_ribbon(
aes(
ymin = allagelcli,
ymax = allageucli,
fill = sex
)
) +
geom_line(
aes(y = allageesprate)
) +
coord_cartesian(
ylim = c(0, 3000)
)
tidied_allagerates %>%
rename(rate = allageesprate) %>%
filter(sex == "M") -> allagemale
lm(rate ~ time, data = allagemale) -> mod_male
tidied_allagerates %>%
rename(rate = allageesprate) %>%
filter(sex == "F") -> allagefemale
lm(rate ~ time, data = allagefemale) -> mod_female
summary (mod_male)
Call:
lm(formula = rate ~ time, data = allagemale)
Residuals:
Min 1Q Median 3Q Max
-68.73 -38.87 -16.80 36.84 142.55
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 69951.244 1164.509 60.07 <2e-16 ***
time -34.056 0.581 -58.62 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 51.04 on 112 degrees of freedom
Multiple R-squared: 0.9684, Adjusted R-squared: 0.9681
F-statistic: 3436 on 1 and 112 DF, p-value: < 2.2e-16
summary (mod_female)
Call:
lm(formula = rate ~ time, data = allagefemale)
Residuals:
Min 1Q Median 3Q Max
-56.519 -24.836 -3.137 17.272 89.669
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 34630.7949 734.8493 47.13 <2e-16 ***
time -16.6907 0.3666 -45.52 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 32.21 on 112 degrees of freedom
Multiple R-squared: 0.9487, Adjusted R-squared: 0.9483
F-statistic: 2072 on 1 and 112 DF, p-value: < 2.2e-16
segmented(mod_male, seg.Z = ~time, psi = 1994.125) -> maleonebreak
segmented(mod_female, seg.Z = ~time, psi = 1994.125) -> femaleonebreak
summary(maleonebreak)
***Regression Model with Segmented Relationship(s)***
Call:
segmented.lm(obj = mod_male, seg.Z = ~time, psi = 1994.125)
Estimated Break-Point(s):
Est. St.Err
2013.496 0.464
Meaningful coefficients of the linear terms:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 76334.045 1118.969 68.218 <2e-16 ***
time -37.249 0.559 -66.637 <2e-16 ***
U1.time 39.113 5.730 6.826 NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 36.76 on 110 degrees of freedom
Multiple R-Squared: 0.9839, Adjusted R-squared: 0.9835
Convergence attained in 2 iterations with relative change -1.957553e-16
summary(femaleonebreak)
***Regression Model with Segmented Relationship(s)***
Call:
segmented.lm(obj = mod_female, seg.Z = ~time, psi = 1994.125)
Estimated Break-Point(s):
Est. St.Err
2014.373 0.577
Meaningful coefficients of the linear terms:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37363.1522 793.9771 47.058 <2e-16 ***
time -18.0576 0.3966 -45.535 <2e-16 ***
U1.time 24.0687 5.4286 4.434 NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 27.34 on 110 degrees of freedom
Multiple R-Squared: 0.9637, Adjusted R-squared: 0.9627
Convergence attained in 5 iterations with relative change 1.770497e-16
tidied_allagerates %>%
filter(sex == "M") %>%
mutate(
male_onebreak_pred = predict(
maleonebreak,
newdata = tidied_allagerates %>%
filter(sex == "M") %>%
select(time)
)
) -> malepredicted
malepredicted %>%
ggplot(
aes(x = time)
) +
geom_point(
aes(y = allageesprate)
) +
geom_line(
aes(y = male_onebreak_pred)
)
obs_pred_both %>%
ggplot(
aes(x = time, color = sex)
) +
geom_point(
aes(y = observed)
) +
geom_line(
aes(y = predicted)
)
obs_pred_both %>%
mutate(residual=observed-predicted) %>%
ggplot(
aes(x = time, shape = sex, color=sex)
) +
geom_point(
aes(y = residual)
)+
ylim(c(-100,100))+
geom_hline(yintercept = 0)+
geom_vline(xintercept = 2013.496)+
geom_vline(xintercept = 2014.373, linetype= "dashed")
obs_pred_both %>%
mutate(residual=observed-predicted) %>%
ggplot(
aes(x = time)
) +
geom_point(
aes(y = residual)
) +
facet_wrap(~sex) +
ylim(c(-100,100))+
geom_hline(yintercept = 0)+
geom_vline(xintercept = maleonebreak[["psi"]][[2]])+
#geom_vline(xintercept = 2013.496)+
geom_vline(xintercept = femaleonebreak[["psi"]][[2]], linetype= "dashed")
#geom_vline(xintercept = 2014.373, linetype= "dashed")
segmented(mod_male, seg.Z = ~time, psi = c(1994, 2002)) -> maletwobreaks
segmented(mod_female, seg.Z = ~time, psi = c(1994, 2002)) -> femaletwobreaks